import csv
import generatefeedvector as generate
import pandas as pd
import json

#Retorna un diccionari amb la seguent estricutra:
#{id_blog:(nom_politic,{paraula1:numAparicions;paraule2:numAparicision,...})}
def download_text(df):
    dicc={}
    invalid_blogs=[]
    #recorremos la lista con toda la informacion de todos los blogs
    urls=list(df['url'])
    for i in df.index:
        try:
            dicc[i]=generate.getwordcounts(df.ix[i]['url'])
            #print "blog :", df.ix[i]['nom']
        except:
            invalid_blogs.append(df.index[i])
            #print "Link incorrecte, blog", df.ix[i]['nom']
    print "\nLectura dels blogs finalizada.\n",
    return dicc,invalid_blogs

# CREACIO DEL DATAFRAME 
unames = ['id_blog', 'partit_politic', 'nom', 'url']
df = pd.read_table('blogs.dat',sep='::', header=None, names=unames)

# EXEMPLE DE COM BAIXR D'UN CONJUNT DE BLOGS
data_blogs, invalid_blogs = download_text(df)

with open('data_blogs.json','w') as f:
	json.dump(data_blogs, f)

with open('invalid_blogs.json','w') as f:
	json.dump(invalid_blogs, f)
